{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "##### 1. Считать данные с помощью pandas. Данные находятся в файле kc_house_data.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# df = pd.read_csv('kc_house_data.csv', sep=',')\n",
    "df = pd.read_csv(\"https://gbcdn.mrgcdn.ru/uploads/asset/5964767/attachment/6c023c8fce9d5cc694e5d6ce941951ae.csv\", sep=\",\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 2. Вывести на экран первые 5 строк"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>price</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>sqft_living</th>\n",
       "      <th>sqft_lot</th>\n",
       "      <th>floors</th>\n",
       "      <th>waterfront</th>\n",
       "      <th>view</th>\n",
       "      <th>...</th>\n",
       "      <th>grade</th>\n",
       "      <th>sqft_above</th>\n",
       "      <th>sqft_basement</th>\n",
       "      <th>yr_built</th>\n",
       "      <th>yr_renovated</th>\n",
       "      <th>zipcode</th>\n",
       "      <th>lat</th>\n",
       "      <th>long</th>\n",
       "      <th>sqft_living15</th>\n",
       "      <th>sqft_lot15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7129300520</td>\n",
       "      <td>20141013T000000</td>\n",
       "      <td>221900.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1180</td>\n",
       "      <td>5650</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>7</td>\n",
       "      <td>1180</td>\n",
       "      <td>0</td>\n",
       "      <td>1955</td>\n",
       "      <td>0</td>\n",
       "      <td>98178</td>\n",
       "      <td>47.5112</td>\n",
       "      <td>-122.257</td>\n",
       "      <td>1340</td>\n",
       "      <td>5650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6414100192</td>\n",
       "      <td>20141209T000000</td>\n",
       "      <td>538000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>2.25</td>\n",
       "      <td>2570</td>\n",
       "      <td>7242</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>7</td>\n",
       "      <td>2170</td>\n",
       "      <td>400</td>\n",
       "      <td>1951</td>\n",
       "      <td>1991</td>\n",
       "      <td>98125</td>\n",
       "      <td>47.7210</td>\n",
       "      <td>-122.319</td>\n",
       "      <td>1690</td>\n",
       "      <td>7639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5631500400</td>\n",
       "      <td>20150225T000000</td>\n",
       "      <td>180000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1.00</td>\n",
       "      <td>770</td>\n",
       "      <td>10000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>6</td>\n",
       "      <td>770</td>\n",
       "      <td>0</td>\n",
       "      <td>1933</td>\n",
       "      <td>0</td>\n",
       "      <td>98028</td>\n",
       "      <td>47.7379</td>\n",
       "      <td>-122.233</td>\n",
       "      <td>2720</td>\n",
       "      <td>8062</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2487200875</td>\n",
       "      <td>20141209T000000</td>\n",
       "      <td>604000.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3.00</td>\n",
       "      <td>1960</td>\n",
       "      <td>5000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>7</td>\n",
       "      <td>1050</td>\n",
       "      <td>910</td>\n",
       "      <td>1965</td>\n",
       "      <td>0</td>\n",
       "      <td>98136</td>\n",
       "      <td>47.5208</td>\n",
       "      <td>-122.393</td>\n",
       "      <td>1360</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1954400510</td>\n",
       "      <td>20150218T000000</td>\n",
       "      <td>510000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>2.00</td>\n",
       "      <td>1680</td>\n",
       "      <td>8080</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>8</td>\n",
       "      <td>1680</td>\n",
       "      <td>0</td>\n",
       "      <td>1987</td>\n",
       "      <td>0</td>\n",
       "      <td>98074</td>\n",
       "      <td>47.6168</td>\n",
       "      <td>-122.045</td>\n",
       "      <td>1800</td>\n",
       "      <td>7503</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           id             date     price  bedrooms  bathrooms  sqft_living  \\\n",
       "0  7129300520  20141013T000000  221900.0         3       1.00         1180   \n",
       "1  6414100192  20141209T000000  538000.0         3       2.25         2570   \n",
       "2  5631500400  20150225T000000  180000.0         2       1.00          770   \n",
       "3  2487200875  20141209T000000  604000.0         4       3.00         1960   \n",
       "4  1954400510  20150218T000000  510000.0         3       2.00         1680   \n",
       "\n",
       "   sqft_lot  floors  waterfront  view  ...  grade  sqft_above  sqft_basement  \\\n",
       "0      5650     1.0           0     0  ...      7        1180              0   \n",
       "1      7242     2.0           0     0  ...      7        2170            400   \n",
       "2     10000     1.0           0     0  ...      6         770              0   \n",
       "3      5000     1.0           0     0  ...      7        1050            910   \n",
       "4      8080     1.0           0     0  ...      8        1680              0   \n",
       "\n",
       "   yr_built  yr_renovated  zipcode      lat     long  sqft_living15  \\\n",
       "0      1955             0    98178  47.5112 -122.257           1340   \n",
       "1      1951          1991    98125  47.7210 -122.319           1690   \n",
       "2      1933             0    98028  47.7379 -122.233           2720   \n",
       "3      1965             0    98136  47.5208 -122.393           1360   \n",
       "4      1987             0    98074  47.6168 -122.045           1800   \n",
       "\n",
       "   sqft_lot15  \n",
       "0        5650  \n",
       "1        7639  \n",
       "2        8062  \n",
       "3        5000  \n",
       "4        7503  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stroki = df.head(5)\n",
    "stroki"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 3. Создать новый признак price_per_sq_lot, который будет содержать avg = среднюю стоимость за один кв. метр общей площади"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0         39.274336\n",
       "1         74.288870\n",
       "2         18.000000\n",
       "3        120.800000\n",
       "4         63.118812\n",
       "            ...    \n",
       "21608    318.302387\n",
       "21609     68.811285\n",
       "21610    297.852593\n",
       "21611    167.504188\n",
       "21612    302.044610\n",
       "Length: 21613, dtype: float64"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"price_per_sq_lot\"] = df[\"price\"] / df[\"sqft_lot\"]\n",
    "avg = df[\"price\"] / df[\"sqft_lot\"]\n",
    "avg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 4. Создать новый признак delta_renovated, который будет содержать разницу в годах между годом реновации дома и годом постройки дома и сохраните в renv. Если реновации дома не было, то в новом признаке поставьте 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    21613.000000\n",
       "mean         2.380882\n",
       "std         12.359528\n",
       "min          0.000000\n",
       "25%          0.000000\n",
       "50%          0.000000\n",
       "75%          0.000000\n",
       "max        114.000000\n",
       "Name: delta_renovated, dtype: float64"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['delta_renovared'] = 0\n",
    "df.loc[df['yr_renovated'] != 0, 'delta_renovared'] = df['yr_renovated'] - df['yr_built']\n",
    "\n",
    "# И второй вариант решения:\n",
    "# df[\"delta_renovared\"] = df.apply(lambda row: row[\"yr_renovated\"] - row[\"yr_built\"] if row[\"yr_renovated\"] > 0 else 0, axis=1)\n",
    "\n",
    "# И третий вариант решения:\n",
    "# df[\"delta_renovared\"] = np.where(df[\"yr_renovated\"] > 0, df[\"yr_renovated\"] - df[\"yr_built\"], 0)\n",
    "\n",
    "renv = df.delta_renovared"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 5. Создайте признаки года продажи, месяца продажи и сохраните в year_ch, month_ch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['date'] = pd.to_datetime(df['date'])\n",
    "year_ch = df['date'].apply(lambda x: x.date().year)\n",
    "month_ch = df['date'].apply(lambda x: x.date().month)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 6. Удалите признаки date, zipcode, lat, long и сохраните в new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "new = df.drop(columns=[\"date\", \"zipcode\", \"lat\", \"long\"], inplace=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
